{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "857c5be0-9b2c-438e-8f05-6246f5946204",
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install llama-index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "621096b6-7eb7-4aab-a231-ba464731a615",
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install llama-index-llms-openai"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b1f054d5-9704-4e56-bd8d-9c58235c28e5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"sk-\"\n",
    "\n",
    "import nest_asyncio\n",
    "\n",
    "nest_asyncio.apply()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0e9b4ccd-c6af-44d1-9726-b8fdaa40eac0",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n",
    "from llama_index.core.tools import QueryEngineTool, ToolMetadata\n",
    "from llama_index.core.query_engine import SubQuestionQueryEngine\n",
    "from llama_index.core.callbacks import CallbackManager, LlamaDebugHandler\n",
    "from llama_index.core import Settings\n",
    "from llama_index.llms.openai import OpenAI\n",
    "from IPython.display import Markdown, display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9f04ddb5-6dcd-4f1b-acdf-eeb78d87b380",
   "metadata": {},
   "outputs": [],
   "source": [
    "# LLM (gpt-3.5)\n",
    "gpt35 = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
    "\n",
    "# LLM (gpt-4)\n",
    "gpt4 = OpenAI(temperature=0, model=\"gpt-4\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9e426703-b6cb-4932-8e41-05f3568a1d5b",
   "metadata": {},
   "outputs": [],
   "source": [
    "%mkdir -p 'data/paul_graham/'\n",
    "%wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5878aa8e-5dd1-438c-a597-9c751468e93a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load documents\n",
    "documents = SimpleDirectoryReader(\"./data/paul_graham/\").load_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a2414fdf-7304-495f-b91e-4c8e881adc7e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Using the LlamaDebugHandler to print the trace of the sub questions\n",
    "# captured by the SUB_QUESTION callback event type\n",
    "llama_debug = LlamaDebugHandler(print_trace_on_end=True)\n",
    "callback_manager = CallbackManager([llama_debug])\n",
    "\n",
    "Settings.callback_manager = callback_manager"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3699ad64-acb3-4322-8fb1-0c1e59eaa53a",
   "metadata": {},
   "outputs": [],
   "source": [
    "index = VectorStoreIndex.from_documents(documents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f4a9b8ac",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core.indices.query.query_transform.base import (\n",
    "    StepDecomposeQueryTransform,\n",
    ")\n",
    "\n",
    "# gpt-4\n",
    "step_decompose_transform = StepDecomposeQueryTransform(llm=gpt4, verbose=True)\n",
    "\n",
    "# gpt-3\n",
    "step_decompose_transform_gpt3 = StepDecomposeQueryTransform(\n",
    "    llm=gpt35, verbose=True\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e5808c79",
   "metadata": {},
   "outputs": [],
   "source": [
    "index_summary = \"Used to answer questions about the author\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3219ee9e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# set Logging to DEBUG for more detailed outputs\n",
    "from llama_index.core.query_engine import MultiStepQueryEngine\n",
    "\n",
    "query_engine = index.as_query_engine(llm=gpt4)\n",
    "query_engine = MultiStepQueryEngine(\n",
    "    query_engine=query_engine,\n",
    "    query_transform=step_decompose_transform,\n",
    "    index_summary=index_summary,\n",
    ")\n",
    "response_gpt4 = query_engine.query(\n",
    "    \"What is the article about?\", \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4effe830",
   "metadata": {},
   "outputs": [],
   "source": [
    "# extract the most new query"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "51ac81e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# build index and query engine\n",
    "vector_query_engine = VectorStoreIndex.from_documents(\n",
    "    documents,\n",
    "    use_async=True,\n",
    ").as_query_engine()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2c44f8cc",
   "metadata": {},
   "outputs": [],
   "source": [
    "# setup base query engine as tool\n",
    "query_engine_tools = [\n",
    "    QueryEngineTool(\n",
    "        query_engine=vector_query_engine,\n",
    "        metadata=ToolMetadata(\n",
    "            name=\"documents\",\n",
    "            description=\"Paul Graham essay on What I Worked On\",\n",
    "        ),\n",
    "    ),\n",
    "]\n",
    "\n",
    "query_engine = SubQuestionQueryEngine.from_defaults(\n",
    "    query_engine_tools=query_engine_tools,\n",
    "    use_async=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "427424fe",
   "metadata": {},
   "outputs": [],
   "source": [
    "response = query_engine.query(\n",
    "    # instead of this, use extracted query \n",
    "    #To Do: reference newest query here \n",
    "    \"What is the specific topic of this article by Paul Graham?\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "41c2c713",
   "metadata": {},
   "outputs": [],
   "source": [
    "# iterate through sub_question items captured in SUB_QUESTION event\n",
    "from llama_index.core.callbacks import CBEventType, EventPayload\n",
    "\n",
    "for i, (start_event, end_event) in enumerate(\n",
    "    llama_debug.get_event_pairs(CBEventType.SUB_QUESTION)\n",
    "):\n",
    "    qa_pair = end_event.payload[EventPayload.SUB_QUESTION]\n",
    "    print(\"Sub Question \" + str(i) + \": \" + qa_pair.sub_q.sub_question.strip())\n",
    "    print(\"Answer: \" + qa_pair.answer.strip())\n",
    "    print(\"====================================\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "33ba9435",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(response)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
